# -*- coding : utf_8 -*-
"""
=======================================
Time : 2025/4/13 19:59
Author : 田霄汉
Email : 522989570@qq.com
File : combat_class17.py
Project : pandas_demo
Function : 去除重复数据和找到重复数据
=======================================
"""
import pandas as pd
from config.setting import file_path

combat17_path = file_path['excel_path'] + '/combat17.xlsx'
Score_df = pd.read_excel(io=combat17_path,
                         skiprows=3,
                         usecols='C:G')

# # 去重操作
# # subset参数是选取哪一列进行对比，keep是发现重复后保留前面还是后面，如果是前面keep=first
# Score_df.drop_duplicates(subset='Full Name', inplace=True, keep='last')

# 找出重复数据
dupe = Score_df.duplicated(subset='Full Name')
# 查看文件里面是否有重复数据，如果是True表示有重复数据
print(dupe.any())
# 筛选出来dump==True的数据
dupe = dupe[dupe == True]
# DataFrame需要使用iloc[]进行定位Int64Index，注意定位索引是按照默认索引，而不是index_col
# 所以read_excel()参数里面不要带index_col
print(Score_df.iloc[dupe.index])
